clear all
set type double
set seed 1234

*import delimited using bidsummary_13jul20.csv, clear
import delimited ../Data/caltrans/bidsummary.csv, clear
gen line_id = _n
*import delimited using bid_award.csv, clear

cap drop deadline_bidding
gen deadline_bidding = date(bid_opening_date, "MD20Y")
format deadline_bidding %td

egen id = group(bid_opening_date contract_number)

gen bidder = bidder_info + " " + bidder_info_2
rename bidder_info_3 bidder_address1
rename bidder_info_4 bidder_address2
cap drop bidder_tel
gen bidder_tel = ustrregexs(1) if ustrregexm(bidder_info_cont, "([0-9]{3} [0-9]{3}-[0-9]{4})")


cap drop A_pl_B
gen A_pl_B = bid_total_5 if ustrregexm(bid_total_5, "^[0-9,\.]+$")
order A_pl_B

cap drop COMPARISON_AMT
gen COMPARISON_AMT = ustrregexs(1) if ustrregexm(bidder_info_cont_4,"([0-9,.]+)") 
replace COMPARISON_AMT = "" if ustrregexm(bidder_info_cont_3, "COMPARISON AMT:") == 0
gen record_comparison_amt = "comparison_amt" if COMPARISON_AMT != ""
replace COMPARISON_AMT = A_pl_B if COMPARISON_AMT == ""
replace COMPARISON_AMT = bid_total if COMPARISON_AMT == ""
destring COMPARISON_AMT, replace ignore(",")
sort id COMPARISON_AMT line_id
*by id: gen bid_rank_comp = _n


cap drop scoring
gen scoring = 1 if ustrregexm(bid_rank, "[0-9]+ A\)")
replace bid_rank = usubinstr(bid_rank, " A)","",.)
destring bid_rank, replace

tempfile tempf
preserve
	import delimited ../Data/caltrans/award_14jul20.csv, clear encoding("utf8")
	replace contract_number = usubinstr(contract_number, " </strong>", "",.)
	duplicates drop
	gen deadline_bidding = date(bid_opening_date, "MD20Y")
	replace phone_number = ustrregexs(1) + " " + ustrregexs(2) if ustrregexm(phone_number, "\(([0-9]{3})\)([0-9]{3}\-[0-9]{4})")
	save `tempf', replace
restore
merge m:1 deadline_bidding contract_number using `tempf'

drop if _merge == 2

rename phone_number awarder_tel

* needs award info
gen winner = (awarder_tel == bidder_tel)
tab bid_rank if winner == 1


bys deadline_bidding contract_number: egen multi_auctions = total(winner)

order id bid_rank* winner awarder bidder
tab bid_rank if winner == 1
gen lowbid_not_winner = 1 if bid_rank ~= 1 & winner == 1
bys pdfname: egen lownotwinner_auction = max(lowbid_not_winner)
sort deadline_bidding contract_number
format deadline_bidding %td
order bidder_id _merge deadline_bidding contract_number multi_auctions lownotwinner_auction

/* one auction in a day but winner is the lowest bidrank_comp that takes into account comparison amount */
	*replace bid_rank = bid_rank_comp if multi_auctions == 1 & lownotwinner_auction == 1
/* multi auctions a day -> drop the auction in which low bidder is not the winner*/
drop if multi_auctions == 2 & lownotwinner_auction == 1

tab bid_rank if winner == 1
/* to be solved*/
*deadline_bidding	contract_number
*24jan2018	03-0G8804

/* multiple auctions with identical contract_number 

   BID_RANK |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |        948       99.89       99.89
          3 |          1        0.11      100.00
------------+-----------------------------------
      Total |        949      100.00
*/


gen year = year(deadline_bidding)

destring(bid_total), gen(bid) ignore(",")
destring(engineers_est), gen(est) ignore(",")
destring(COMPARISON_AMT), gen(comparison_amt) ignore(",")
replace awarded_amount = usubinstr(awarded_amount,"$","",.)
destring(awarded_amount), replace ignore(",")


gen pct_bid = COMPARISON_AMT / est

sort id pdfname bid_rank line_id
sort deadline_bidding contract_number bidder_id bid_total pdfname line_id

order year deadline_bidding id contract_number bidder_id COMPARISON_AMT bid pct_bid est 

cap drop d_n
by deadline_bidding contract_number bidder_id bid_total: gen d_n = _n
order d_n
keep if d_n == 1
duplicates tag id deadline_bidding bidder_id bid_rank, gen(dddd)
tab dddd


cap drop temp
gen temp = COMPARISON_AMT if winner == 1
bys id: egen comparison_amt_winner = max(temp)
drop temp
cap drop comparison_amt_rd
gen comparison_amt_rd = COMPARISON_AMT if COMPARISON_AMT >= comparison_amt_winner & COMPARISON_AMT < .
sort id comparison_amt_rd line_id
cap drop bid_rank_rd
by id: gen bid_rank_rd = _n


sort id
forvalues i = 1(1)4{
	cap drop ttt
	cap drop pct_bid`i'
	gen ttt= pct_bid if bid_rank_rd == `i'
	by id: egen pct_bid`i' = max(ttt)
	cap drop ttt
}
cap drop temp*
gen temp = pct_bid if winner == 1
bys id: egen pct_winbid = max(temp)
drop temp
gen temp = bid if winner == 1
by id: egen winbid = max(temp)

cap drop delta
gen delta = pct_bid - pct_bid1 if bid_rank_rd > 1 & bid_rank_rd < .
replace delta = pct_bid - pct_bid2 if bid_rank_rd == 1

gen delta23 = pct_bid - pct_bid2 if bid_rank_rd > 2 & bid_rank_rd < .
replace delta23 = pct_bid - pct_bid3 if bid_rank_rd == 2

cap drop temp 
g temp = bid if bid_rank_rd == 1 
bys id: egen lowest = max(temp)
gen pct_lbid = COMPARISON_AMT / lowest 

forvalues i = 1(1)4{
	cap drop ttt
	cap drop pct_lbid`i'
	gen ttt= pct_lbid if bid_rank_rd == `i'
	by id: egen pct_lbid`i' = max(ttt)
	cap drop ttt
}

cap drop ldelta 
gen ldelta = pct_lbid - pct_lbid1 if bid_rank_rd > 1 & bid_rank_rd < .
replace ldelta = pct_lbid - pct_lbid2 if bid_rank_rd == 1

gen ldelta23 = pct_lbid - pct_lbid2 if bid_rank_rd > 2 & bid_rank_rd < .
replace delta23 = pct_lbid - pct_lbid3 if bid_rank_rd == 2

**************** delta based on winner ****************
sort id bid_rank line_id
gen bid_rank_win = 1 if winner == 1
by id: replace bid_rank_win = bid_rank_win[_n-1]+1 if bid_rank_win[_n-1] < . 

*hist delta if abs(delta) < .2 & winner == 0, bin(200)
*hist delta if abs(delta) < .2 & winner == 1, bin(200)

gen low_bidder = (bid_rank == 1)
cap drop ttt
gen ttt = bid if bid_rank == 1
bys id: egen lowbid = max(ttt)
cap drop ttt

cap drop deadline_bidding
gen deadline_bidding = date(bid_opening_date, "MD20Y")
egen firm_id = group(bidder bidder_address2)
egen firm_tg = tag(firm_id)

bys id: egen N_bids = total(bid < .)

sort deadline_bidding n bid_rank
order irregular deadline_bidding contract_number bid_rank bid_total COMPARISON_AMT bid bidder_id bidder_info bidder_info_cont bidrank0
format deadline_bidding %td
drop if _merge == 2
rename _merge _merge_bidtab

sort deadline_bidding contract_number bidder_id bid_rank

compress

cap drop FLAG
gen FLAG = 0

rename deadline_bidding bid_date
format bid_date %td
keep if bid_date > td(1jan2011)
rename id contract_id

gen Bwin = (bid_rank_rd == 1)
tempvar tempv
gen `tempv' = bid if Bwin == 1
bys contract_id: egen Bwinbid = max(`tempv')
drop `tempv'

************* backlog construction module *****************
do backlog_construction.do 
backlog_construction "Bwinbid" "Bwin"
***********************************************************
************* fowardlog construction module ***************
do forwardlog_construction.do
fowardlog_construction "Bwinbid" "Bwin" 
***********************************************************

gen d_bridge = ustrregexm(type, "BRIDGE")
gen d_paving = ustrregexm(type, "(?:PAVEMENT|PAVING|ASPHALT)")
gen d_landscaping = ustrregexm(type, "(?:PLANTING|LANDSCAP)")
gen d_electoric = ustrregexm(type, "(SIGNAL|LIGHTING)")
gen d_slope = ustrregexm(type, "SLOPE")
gen d_painting = ustrregexm(type, "(?:CENTER)?LINE ")

save ../Data/caltrans/sample_caltrans29Dec24.dta, replace

duplicates tag bid_date contract_number bidder_id, gen(d1)
duplicates tag bid_date contract_number bid_rank, gen(d2)
tab d1
tab d2
